#!/usr/bin/python3
#coding=utf-8
import scrapy
from spiderTest.items import TestItem

class shicheSpider(scrapy.Spider):
    name = 'shiche'
    allowed_domains = ["shiche.com.cn"]
    start_urls = [
        'http://www.shiche.com.cn'
    ]

    def parse(self, response):
        '''
        返回2种结果：1需要进一步crawl的连接 2可以存储的数据
        xPath是一种用于从xml(html)选择节点的语言
        :param response:
        :return:
        '''
        articlesData = []
        item = TestItem()
        articles = response.xpath('//li[@class="circle-dot-v1"]')
        for article in articles:
            title = article.xpath('a/text()').extract()#将匹配的结果转化为unicode字符串
            href = article.xpath('a/@href').extract()
            item['title'] = title[0]
            item['href'] = href[0]
            yield item
